<?php namespace Phpcmf\Model;

class Spider extends \Phpcmf\Model
{

    public $spider = [
        '百度' => 1,
        '谷歌' => 2,
        '搜搜' => 3,
        '必应' => 4,
        '雅虎' => 5,
        '搜狗' => 6,
        '360搜索' => 7,
        '有道' => 8,
    ];

    // 获取蜘蛛名称
    public function get_spider($string) {

        if (stripos($string, 'baiduspider') !== false) {
            return '百度';
        } elseif (stripos($string, 'googlebot') !== false) {
            return '谷歌';
        } elseif (stripos($string, 'sosospider') !== false) {
            return '搜搜';
        } elseif (stripos($string, 'bingbot') !== false) {
            return '必应';
        } elseif (stripos($string, 'Yahoo! Slurp') !== false) {
            return '雅虎';
        } elseif (stripos($string, 'Sogou') !== false) {
            return '搜狗';
        } elseif (stripos($string, '360Spider') !== false) {
            return '360搜索';
        } elseif (stripos($string, 'YoudaoBot') !== false) {
            return '有道';
        }

        return '';
    }

    // 蜘蛛名称id
    private function _get_spider_id($name) {
        return (int)$this->spider[$name];
    }

    // 域名白名单检测
    private function _domain_check($domain) {

        if (is_file(WRITEPATH.'config/domain_spider.php')) {
            $bmd = require WRITEPATH.'config/domain_spider.php';
            if (in_array($domain, $bmd)) {
                return 1;
            }
        }

        $site = require WRITEPATH.'config/domain_site.php';
        if (isset($site[$domain])) {
            return 1;
        }

        return 0;
    }

    // 储存日志内容
    public function save_log($url, $spider, $is_mobile, $title) {

        if (!$title) {
            return '标题为空';
        }

        $spider_id = $this->_get_spider_id($spider);
        if (!$spider_id) {
            return '蜘蛛不存在';
        }

        $arr = parse_url($url);
        $domain = $arr['host']; // 域名识别
        if (!$this->_domain_check($domain)) {
            return '域名排查';
        }

        $this->table('app_spider')->insert([
            'site' => SITE_ID,
            'spider' => $spider_id,
            'domain' => $domain,
            'title' => $title,
            'url' => $url,
            'mobile' => $is_mobile ? 1 : 0,
            'agent' => $_SERVER['HTTP_USER_AGENT'],
            'inputtime' => SYS_TIME,
        ]);

        return '';
    }

}